Consider the cohort as a single group to start. Do not stratify by case-control status.
Stack the heatmaps for CD4. Are there obvious patterns to the antigens?
Stack the heatmaps for NOTCD4. Are there patterns? Are these patterns different from CD4?
Calculate FS/PFS for each stimulation in CD4 and compare side-by-side in boxplots and test by ANOVA. Is there a clear winner? Calculate FS/PFS for each stimulation in NOTCD4 and compare with boxplot and ANOVA. Is there a winner? Is this different than CD4?
It may also make sense to make a corrplot of the FS/PFS from the different stims. This is asking a different question though, i.e. whether a patient’s cytokine responses tend to be consistently elevated/not-elevated across multiple stims.
Regress FS/PFS against age (linear). Is there an effect of age on T cell functionality? Regress FS/PFS against days from symptom onset (linear). Is there a decrease in functionality with time? Stratify FS/PFS by sex and test by t-test (or Mann-Whitney, though sample size is large enough). Is there a difference?
Only then would you start asking the case-control questions.
library(here)
library(tidyverse)
library(COMPASS)
library(ggplot2)
library(data.table)
library(broom)
library(psych)
library(corrplot)
library(ggpubr)
library(knitr)
library(kableExtra)
library(cowplot)
library(purrr)
library(flowWorkspace)
library(GGally)
library(patchwork)
library(RColorBrewer)
library(ComplexHeatmap)
library(ggbeeswarm)
source(here::here("scripts/20200604_Helper_Functions.R"))
save_output <- FALSE
# Arial font setup. Downloaded afms from https://github.com/microsoft/microsoft-r-open/tree/ec3fd89e5fb5794bd8149905c134ad801bb61800
Arial <- Type1Font(family = "Arial",
metrics = c(here::here("data/Arial_afm/ArialMT.afm"),
here::here("data/Arial_afm/ArialMT-Bold.afm"),
here::here("data/Arial_afm/ArialMT-Italic.afm"),
here::here("data/Arial_afm/ArialMT-BoldItalic.afm")))
pdfFonts(Arial = Arial)
# Next 4 lines from Run_COMPASS.R
stims_for_compass_runs <- c("VEMP", "Spike 1", "Spike 2", "NCAP")
parent_nodes_for_compass_runs <- c("4+", "NOT4+", "8+")
stims_for_compass_runs_rep <- rep(stims_for_compass_runs, each = length(parent_nodes_for_compass_runs))
parent_nodes_for_compass_runs_rep <- rep(parent_nodes_for_compass_runs, times = length(stims_for_compass_runs))
crList <- purrr::pmap(.l = list(parent_nodes_for_compass_runs_rep,
stims_for_compass_runs_rep),
.f = function(parent, currentStim) {
currentStimForFile <- gsub(" ", "_", currentStim)
crPath <- here::here(sprintf("out/CompassOutput/%s/%s/COMPASSResult_%s_%s.rds", parent, currentStimForFile, parent, currentStimForFile))
readRDS(crPath)
}) %>%
set_names(gsub("+", "", gsub(" ", "_", paste0(parent_nodes_for_compass_runs_rep, "_", stims_for_compass_runs_rep)), fixed=TRUE))
names(crList)
## [1] "4_VEMP" "NOT4_VEMP" "8_VEMP" "4_Spike_1" "NOT4_Spike_1"
## [6] "8_Spike_1" "4_Spike_2" "NOT4_Spike_2" "8_Spike_2" "4_NCAP"
## [11] "NOT4_NCAP" "8_NCAP"
CD4RunNames <- c("4_Spike_1", "4_Spike_2", "4_NCAP", "4_VEMP")
NotCD4RunNames <- c("NOT4_Spike_1", "NOT4_Spike_2", "NOT4_NCAP", "NOT4_VEMP")
CD8RunNames <- c("8_Spike_1", "8_Spike_2", "8_NCAP", "8_VEMP")
plotCOMPASSResultStack(crList[CD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[NotCD4RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[CD8RunNames],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and Not-CD4 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4 and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
plotCOMPASSResultStack(crList[c(CD4RunNames, NotCD4RunNames, CD8RunNames)],
row_annotation = c("Stim"),
variable = "Stim",
show_rownames = FALSE,
main = "CD4, Not-CD4, and CD8 COMPASS Runs",
fontsize = 15, fontsize_row = 15, fontsize_col = 13)
## The 'threshold' filter has removed 7 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa, !CD107a&CD154&!IFNg&!IL17a&IL2&IL4/5/13&TNFa
For this next part, drop the assay controls (healthies & TRIMAs)
# First change any NA Cohort levels to "Healthy"
crList2 <- lapply(crList,
function(cr) {
cr$data$meta$Cohort <- factor(ifelse(cr$data$meta$Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", cr$data$meta$Cohort),
levels = rev(c("Healthy", "Non-hospitalized", "Hospitalized")))
cr
})
names(crList2) <- names(crList)
crList.no_healthy <- lapply(names(crList2),
function(cr_name) {
cr <- crList2[[cr_name]]
print(sprintf("Accessing %s", cr_name))
cr$data$meta <- cr$data$meta %>%
# Filter out samples which weren't run through COMPASS
dplyr::filter(!(`SAMPLE ID` %in% setdiff(cr$data$meta$`SAMPLE ID`, rownames(cr$fit$mean_gamma))))
stopifnot(all.equal(rownames(cr$fit$mean_gamma), cr$data$meta$`SAMPLE ID`))
# And make sure that the count data only includes counts for the samples which were run through COMPASS
stopifnot(all.equal(rownames(cr$data$n_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(rownames(cr$data$n_u), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_s), rownames(cr$fit$mean_gamma)))
stopifnot(all.equal(names(cr$data$counts_u), rownames(cr$fit$mean_gamma)))
not_healthy_idx <- which(cr$data$meta$Cohort != "Healthy")
cr$data$meta <- cr$data$meta[not_healthy_idx,] %>%
mutate(Cohort = factor(Cohort, levels = c("Non-hospitalized", "Hospitalized")))
cr$fit$mean_gamma <- cr$fit$mean_gamma[not_healthy_idx,]
cr$data$n_s <- cr$data$n_s[not_healthy_idx,]
cr$data$n_u <- cr$data$n_u[not_healthy_idx,]
cr$data$counts_s <- cr$data$counts_s[not_healthy_idx]
cr$data$counts_u <- cr$data$counts_u[not_healthy_idx]
cr
})
## [1] "Accessing 4_VEMP"
## [1] "Accessing NOT4_VEMP"
## [1] "Accessing 8_VEMP"
## [1] "Accessing 4_Spike_1"
## [1] "Accessing NOT4_Spike_1"
## [1] "Accessing 8_Spike_1"
## [1] "Accessing 4_Spike_2"
## [1] "Accessing NOT4_Spike_2"
## [1] "Accessing 8_Spike_2"
## [1] "Accessing 4_NCAP"
## [1] "Accessing NOT4_NCAP"
## [1] "Accessing 8_NCAP"
names(crList.no_healthy) <- names(crList2)
# Read in the patient manifest with complete data (though it's also in the COMPASSResult object)
data_manifest <- read.csv(here::here("data/Seshadri_HAARVI_PBMC_manifest_merged_11June2020.csv"), check.names = F, stringsAsFactors = F)
# Remember 116C and 37C didn't get run through COMPASS
# 75C was not run at all
# 07B/551432 didn't get run for Spike 2
# For CD8 only: "BWT20", "15548" didn't get run for "Spike 2", "NCAP", and 15530 didn't get run for "Spike 2"
fs_pfs_df_with_manifest <- lapply(c(CD4RunNames, CD8RunNames), function(n) {
as.data.frame(COMPASS::FunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(FS = `COMPASS::FunctionalityScore(crList[[n]])`) %>%
left_join(as.data.frame(COMPASS::PolyfunctionalityScore(crList[[n]])) %>%
rownames_to_column("SAMPLE ID") %>%
rename(PFS = `COMPASS::PolyfunctionalityScore(crList[[n]])`),
by = "SAMPLE ID") %>%
mutate(COMPASS_run = n)
} ) %>%
data.table::rbindlist() %>%
separate(COMPASS_run, into = c("parent", "STIM"), remove = F, extra = "merge") %>%
mutate(STIM = factor(recode(STIM, "Spike_1" = "S1", "Spike_2" = "S2", "NCAP" = "N", "VEMP" = "E"),
levels = c("S1", "S2", "N", "E"))) %>%
mutate("Record ID" = dplyr::recode(`SAMPLE ID`,
"HS09" = "HS9")) %>% # also, 75C did not get run
full_join(data_manifest, by = c("Record ID" = "Record ID")) %>%
dplyr::filter(!(`Record ID` %in% c("116C", "37C", "75C"))) %>%
mutate(Cohort = factor(ifelse(Cohort %in%
c(NA, "Healthy control", "Healthy control 2017-2018"), "Healthy", Cohort),
levels = c("Hospitalized", "Non-hospitalized", "Healthy"))) %>%
# drop the healthies anyway
dplyr::filter(Cohort != "Healthy") %>%
mutate(Days_Symptom_Onset_to_Visit_1 = as.numeric(`Days symptom onset to visit 1`)) %>%
dplyr::select("SAMPLE ID", "FS", "PFS", "COMPASS_run", "parent", "STIM",
"Cohort", "Age", "Sex", "Race_v2", "Days_Symptom_Onset_to_Visit_1")
unique(fs_pfs_df_with_manifest$Cohort)
## [1] Hospitalized Non-hospitalized
## Levels: Hospitalized Non-hospitalized Healthy
fs_pfs_df_with_manifest %>% dplyr::filter(is.na(FS) | is.na(`SAMPLE ID`) | is.na(STIM) | is.na(Cohort))
## Empty data.table (0 rows and 11 cols): SAMPLE ID,FS,PFS,COMPASS_run,parent,STIM...
# Note removal of points for quade.test in order to have complete block design
# CD4: remove 551432/07B
# And for CD8 need to remove more individuals for complete block design:
# !(`SAMPLE ID` %in% c("BWT20", "15548", "15530"))
# Statistics is calculated after exclusion, but all points are plotted
plot_fs_vs_stim <- function(current_parent) {
StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
StimOutlineColors <- c("S1" = "#f0027fdf", "S2" = "#fdc086df", "N" = "#beaed4df", "E" = "#e6e63cdf")
current_data_for_test <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == current_parent) %>%
pivot_wider(id_cols = c("SAMPLE ID"), names_from = STIM, values_from = FS) %>%
# Remove any individuals which don't have data for all 4 stims, for the purposes of having complete data for the test
na.omit() %>%
# For the pairwise wilcox test, make the data long again
pivot_longer(cols = c(S1, S2, N, E), names_to = "STIM", values_to = "FS") %>%
# mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E"))) %>%
# It should already be in order, but make sure the SAMPLE IDs are in the same order for each STIM
arrange(`SAMPLE ID`, STIM)
quade_test <- quade.test(FS ~ STIM | `SAMPLE ID`, data = current_data_for_test)
pwt <- pairwise.wilcox.test(current_data_for_test$FS, current_data_for_test$STIM, p.adjust.method = "bonferroni", paired = TRUE)
# For the plot, don't filter out individuals who don't have data for all 4 stims
# This may result in inconsistency between the test results and the plot, but it should be pretty similar
current_data_for_plot <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == current_parent) %>%
mutate(STIM = factor(STIM, levels = c("S1", "S2", "N", "E")))
current_plot <- ggplot(current_data_for_plot, aes(STIM, FS)) +
theme_bw(base_size = 22) +
geom_hline(yintercept = 0, linetype="dashed", alpha = 0.5) +
geom_violin(aes(fill = `STIM`), draw_quantiles = c(0.5),
# All violins have the same maximum width. Overrides default of equal area
scale="width", width = 0.6) +
geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
scale="width", width = 0.6) +
geom_violin(aes(color=`STIM`), fill="transparent",
scale="width", width=0.6, size=1.1) +
geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size=27),
axis.text.y = element_text(color="black", size=20),
axis.text.x = element_text(color="black", size=26),
plot.title = element_text(hjust = 0.5, size=29),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
# scale_x_discrete(expand = c(0.3, 0.3)) +
labs(y = "Functionality Score",
title = sprintf("CD%s", current_parent)) +
scale_fill_manual(values = StimColors) +
scale_color_manual(values = StimOutlineColors)
list("plot" = current_plot,
"quade_test" = quade_test,
"wilcox_test" = pwt)
}
cd4_fs_vs_stim_out <- plot_fs_vs_stim("4")
cd4_fs_vs_stim_out$quade_test$p.value
## [1] 6.408814e-32
ggplot_build(cd4_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.005442067 0.114283406
annotation_df <- broom::tidy(cd4_fs_vs_stim_out$wilcox_test) %>%
dplyr::filter(p.value < 0.05) %>%
mutate(y_pos = c(0.118, 0.158, 0.145, 0.132, 0.114, 0.1),
p.text = if_else(p.value < 0.001, "p<0.001", paste0("p=", round(p.value, 3))))
cd4_fs_vs_stim_plot <- cd4_fs_vs_stim_out$plot +
coord_cartesian(ylim = c(NA, 0.164)) +
suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
tip_length = c(0.005, 0.005),
textsize=5,
manual = TRUE))
cd4_fs_vs_stim_plot
cd8_fs_vs_stim_out <- plot_fs_vs_stim("8")
cd8_fs_vs_stim_out$quade_test$p.value
## [1] 5.442791e-08
ggplot_build(cd8_fs_vs_stim_out$plot)$layout$panel_params[[1]]$y.range
## [1] -0.002139902 0.044937933
annotation_df <- broom::tidy(cd8_fs_vs_stim_out$wilcox_test) %>%
dplyr::filter(p.value < 0.05) %>%
mutate(y_pos = c(0.069, 0.061, 0.053, 0.045),
p.text = if_else(p.value < 0.001, "p<0.001", paste0("p=", round(p.value, 3))))
cd8_fs_vs_stim_plot <- cd8_fs_vs_stim_out$plot +
coord_cartesian(ylim = c(NA, 0.073)) +
suppressWarnings(ggsignif::geom_signif(inherit.aes=F,data=annotation_df,
aes_string(xmin="group1", xmax="group2", annotations="p.text", y_position="y_pos"),
tip_length = c(0.005, 0.005),
textsize=5,
manual = TRUE))
cd8_fs_vs_stim_plot
# Note that the above data are paired, even though it's represented as violin plots.
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4A_CD4_FS_vs_Stim.pdf"), width=5, height=4,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
cd4_fs_vs_stim_plot
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5F_CD8_FS_vs_Stim.pdf"), width=5, height=4,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
cd8_fs_vs_stim_plot
dev.off()
}
As we saw in the heatmaps, NCAP and Spike 1 consistently have the highest FS/PFS scores, and VEMP has the lowest.
# StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "N" = "#beaed4", "E" = "#e6e63c")
StimColors <- c("S1" = "#f0027fac", "S2" = "#fdc086ac", "N" = "#beaed4ac", "E" = "#e6e63cac")
fs_vs_x_plot <- function(current_parent, current_stim, xaxis="Age", include_regression_line = TRUE) {
current_data <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
theme_bw(base_size = 22) +
geom_point(fill=StimColors[[current_stim]], pch=21, size=2.5) +
theme(axis.title.x = element_text(size=20),
axis.title.y = element_text(size=20),
axis.text.y = element_text(color="black", size=17),
axis.text.x = element_text(color="black", size=17),
plot.title = element_text(hjust=0.5, size=21),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.4, 0.1, 0.2, "cm")) +
labs(title = current_stim,
x = sub("([A-Za-z]*)_.*", "\\1", xaxis))
if(include_regression_line) {
current_plot <- current_plot +
# Linear regression line with 95% CI (based on "standard error of predicted means")
# By "predicted means", we're talking about the regression line's y-value at each value of x
# Pretty sure the grey 95% CI is the "confidence interval of the prediction", as discussed here: https://statisticsbyjim.com/glossary/confidence-interval-prediction/
# So it's not about individual points (a prediction interval), but rather the accuracy of the predicted mean y-value at each x
# Help understanding CI from ?predict.lm and here: https://stackoverflow.com/questions/45742987/how-is-level-used-to-generate-the-confidence-interval-in-geom-smooth
geom_smooth(color = "black", method="lm") +
# stat_cor from ggpubr
stat_cor(method = "pearson", size=5.5)
}
current_plot
}
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_age_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot)
names(fs_vs_age_plot_list) <- paste0(stim_vec, "_", parents_vec)
# p-values NOT adjusted:
fs_vs_age_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_4
## `geom_smooth()` using formula 'y ~ x'
##
## $N_4
## `geom_smooth()` using formula 'y ~ x'
##
## $E_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S1_8
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_8
## `geom_smooth()` using formula 'y ~ x'
##
## $N_8
## `geom_smooth()` using formula 'y ~ x'
##
## $E_8
## `geom_smooth()` using formula 'y ~ x'
fs_vs_days_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot, xaxis="Days_Symptom_Onset_to_Visit_1")
names(fs_vs_days_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_days_plot_list
## $S1_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_4
## `geom_smooth()` using formula 'y ~ x'
##
## $N_4
## `geom_smooth()` using formula 'y ~ x'
##
## $E_4
## `geom_smooth()` using formula 'y ~ x'
##
## $S1_8
## `geom_smooth()` using formula 'y ~ x'
##
## $S2_8
## `geom_smooth()` using formula 'y ~ x'
##
## $N_8
## `geom_smooth()` using formula 'y ~ x'
##
## $E_8
## `geom_smooth()` using formula 'y ~ x'
StimSexColors <- list("S1" = c("F" = "#bd0264ac", "M" = "#fd2898ac"),
"S2" = c("F" = "#fca654ac", "M" = "#fedab8ac"),
"N" = c("F" = "#a38dc2ac", "M" = "#d9cfe6ac"),
"E" = c("F" = "#d4d41bac", "M" = "#ecec69ac"))
StimSexOutlineColors <- list("S1" = c("F" = "#bd0264ff", "M" = "#fd2898ff"),
"S2" = c("F" = "#fca654ff", "M" = "#fedab8ff"),
"N" = c("F" = "#a38dc2ff", "M" = "#d9cfe6ff"),
"E" = c("F" = "#d4d41bff", "M" = "#ecec69ff"))
fs_vs_x_plot_categorical <- function(current_parent, current_stim, xaxis="Sex",
current_fill_colors=StimSexColors[[current_stim]],
current_outline_colors = StimSexOutlineColors[[current_stim]],
xcategorylabs = NULL) {
# current_parent <- "4"; current_stim <- "S1"; xaxis <- "Sex"; current_colors <- StimSexColors[[current_stim]]
# CohortColors <- c("Hospitalized" = "#386cb0", "Non-hospitalized" = "#7fc97f", "Healthy" = "#ebeef0")
# CohortLabs <- c("Non-hospitalized" = "NH", "Hospitalized" = "H")
# current_parent <- "8"; current_stim <- "E"; xaxis <- "Cohort"; current_colors <- CohortColors; xcategorylabs <- CohortLabs
current_data <- fs_pfs_df_with_manifest %>%
dplyr::filter(parent == !!current_parent & STIM == !!current_stim)
current_plot <- ggplot(current_data, aes(!!as.symbol(xaxis), FS)) +
theme_bw(base_size = 22) +
geom_violin(aes(fill = !!as.symbol(xaxis)), draw_quantiles = c(0.5),
# All violins have the same maximum width. Overrides default of equal area
scale="width", width = 0.6) +
geom_violin(fill="transparent", draw_quantiles = c(0.25, 0.75), linetype = "dashed",
scale="width", width = 0.6) +
geom_violin(aes(color=!!as.symbol(xaxis)), fill="transparent",
scale="width", width=0.6, size=1.1) +
geom_quasirandom(size=0.1, width=0.2, varwidth=T, method="quasirandom") +
theme(axis.title.x = element_blank(),
axis.title.y = element_text(size=20),
axis.text.y = element_text(color="black", size=17),
axis.text.x = element_text(color="black", size=20),
plot.title = element_text(hjust = 0.5, size=21),
panel.grid = element_blank(),
legend.position = "none",
plot.margin = margin(0.3, 0.2, 0.1, 0.2, "cm")) +
labs(title = current_stim) + #,
# x = sub("([A-Za-z]*)_.*", "\\1", xaxis)) +
scale_fill_manual(values = current_fill_colors) +
scale_color_manual(values = current_outline_colors)
wilcox_result <- wilcox.test(as.formula(sprintf("FS ~ %s", xaxis)), data = current_data)
p.text <- if(wilcox_result$p.value < 0.001) {
"p<0.001"
} else {
paste0("p=", round(wilcox_result$p.value, 3))
}
y_range <- ggplot_build(current_plot)$layout$panel_params[[1]]$y.range
current_plot <- current_plot +
coord_cartesian(ylim = c(NA, y_range[[2]] + 0.07*diff(y_range))) +
annotate("text", x = 1.5, y = y_range[[2]] + 0.01*diff(y_range), label = p.text, size=5.5)
if(!is.null(xcategorylabs)) {
current_plot <- current_plot +
scale_x_discrete(labels=xcategorylabs)
}
current_plot
}
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_sex_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00798326771653543, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 4.3503937007874e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.0078740157480315, :
## cannot compute exact p-value with ties
names(fs_vs_sex_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_sex_plot_list
## $S1_4
##
## $S2_4
##
## $N_4
##
## $E_4
##
## $S1_8
##
## $S2_8
##
## $N_8
##
## $E_8
In many of the boxplots above, Males are shifted slightly higher on the FS/PFS axis compared to Females, but none of the comparisons are significant.
supp_tbl_1 <- read.csv(here::here("data/T-cell Supplemental Table 1_20200922_MS.csv"), check.names = F, stringsAsFactors = F)
fs_pfs_df_with_manifest <- fs_pfs_df_with_manifest %>%
full_join(supp_tbl_1 %>%
mutate(Comorbidities_Bool = factor(ifelse(Comorbidities %in% c("None of the above", "N/A"), "Absent", "Present"),
levels = c("Present", "Absent"))) %>%
dplyr::select("Study ID", "Comorbidities_Bool"),
by = c("SAMPLE ID" = "Study ID"))
# We don't have COMPASS data on all individuals, so this is not the official table:
fs_pfs_df_with_manifest %>%
dplyr::select("SAMPLE ID", Cohort, Comorbidities_Bool) %>%
unique() %>%
dplyr::select(-"SAMPLE ID") %>%
table()
## Comorbidities_Bool
## Cohort Present Absent
## Hospitalized 13 7
## Non-hospitalized 7 30
## Healthy 0 0
# supp_tbl_1_w_FS <- supp_tbl_1 %>%
# mutate(Comorbidities_Bool = ifelse(Comorbidities %in% c("None of the above", "N/A"), "Absent", "Present")) %>%
# full_join(fs_pfs_df_wide, by = c("Study ID" = "SAMPLE ID")) %>%
# mutate(Cohort_int = recode(Cohort, "Non-hospitalized" = 0, "Hospitalized" = 1)) %>%
# dplyr::select("Study ID", "Cohort", "Cohort_int", "Comorbidities_Bool", "FS_S1_4", "FS_S2_4", "FS_N_4")
StimComorbidColors <- list("S1" = c("Present" = "#bd0264ac", "Absent" = "#fd2898ac"),
"S2" = c("Present" = "#fca654ac", "Absent" = "#fedab8ac"),
"N" = c("Present" = "#a38dc2ac", "Absent" = "#d9cfe6ac"),
"E" = c("Present" = "#d4d41bac", "Absent" = "#ecec69ac"))
StimComorbidOutlineColors <- list("S1" = c("Present" = "#bd0264ff", "Absent" = "#fd2898ff"),
"S2" = c("Present" = "#fca654ff", "Absent" = "#fedab8ff"),
"N" = c("Present" = "#a38dc2ff", "Absent" = "#d9cfe6ff"),
"E" = c("Present" = "#d4d41bff", "Absent" = "#ecec69ff"))
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_comorbid_plot_list <- map2(parents_vec, stim_vec,
function(current_parent, current_stim) {
fs_vs_x_plot_categorical(current_parent, current_stim,
xaxis = "Comorbidities_Bool", current_fill_colors = StimComorbidColors[[current_stim]],
current_outline_colors = StimComorbidOutlineColors[[current_stim]]) +
labs(x="Comorbidities") + theme(axis.text.x = element_text(color="black", size=17),
axis.title.x = element_text(color="black", size=17))
})
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00870807086614173, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 0.00426889763779528, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.00848228346456693, :
## cannot compute exact p-value with ties
names(fs_vs_comorbid_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_comorbid_plot_list
## $S1_4
##
## $S2_4
##
## $N_4
##
## $E_4
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
##
## $S1_8
##
## $S2_8
##
## $N_8
##
## $E_8
Look at the heatmaps again. Also focus on IFNg
Stack the plots from the different stims, this time with IFNg subsets on the RHS
A commonality to the subsets which are enriched in Hospitalized individuals is CD154.
cytokine_annotation_colors <- rep("black", 30)
cytokine_order_for_annotation = c("CD154", "IL2", "TNFa", "CD107a", "IL4/5/13", "IL17a", "IFNg")
# Adapt code from COMPASS::plotCompassResultStack to prepare the data for plotting
# First we use mergeMatricesForPlotCOMPASSResultStack to merge the data from the different runs together.
# Then use the merged data to create a fake merged COMPASSResult object with the merged categories, metadata, and mean_gamma data frames
# This should allow us to use some of the customized options in plot.COMPASSResult.ComplexHeatmap (e.g. to get the IFNg subsets on the RHS)
mergeMatricesForPlotCOMPASSResultStack_tmp <- utils::getFromNamespace("mergeMatricesForPlotCOMPASSResultStack", "COMPASS")
CohortColors_forHeatmap <- c("Conv Hosp" = "#707ed4", "Conv Non-Hosp" = "#c1ddd7", "Assay Controls" = "#ebeef0")
StimColors <- c("S1" = "#f0027f", "S2" = "#fdc086", "NCAP" = "#beaed4", "VEMP" = "#e6e63c") # https://colorbrewer2.org/?type=qualitative&scheme=Accent&n=6
row_ann_colors_v2 <- list(`Stim` = StimColors, `Cohort` = CohortColors_forHeatmap)
# CD4 runs
cd4_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD4RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 5 categories:
## !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&TNFa, !CD107a&!CD154&IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa
if(save_output) {
saveRDS(cd4_data2plot, here::here("processed_data/20200815_Merged_CD4_COMPASS_Data.rds"))
}
cd4_merged_COMPASSResult <- crList2$`4_Spike_1`
cd4_merged_COMPASSResult$fit$mean_gamma <- cd4_data2plot$MMerged
cd4_merged_COMPASSResult$fit$categories <- cd4_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd4_merged_COMPASSResult$fit$categories) <- rownames(cd4_data2plot$catsMerged)
cd4_merged_COMPASSResult$data$meta <- cd4_data2plot$rowannMerged
cd4_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd4_merged_COMPASSResult$data$meta)
cd4_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^4_", "", cd4_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd4_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd4_merged_COMPASSResult$data$meta$Cohort,
"Hospitalized" = "Conv Hosp",
"Non-hospitalized" = "Conv Non-Hosp",
"Healthy" = "Assay Controls"),
levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd4_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd4_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE)
draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
cd4_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd4_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE,
row_gap = unit(0.05, "in"))
# draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
# CD8 runs
cd8_data2plot <- mergeMatricesForPlotCOMPASSResultStack_tmp(crList2[CD8RunNames],
threshold=0.01,
minimum_dof=1,
maximum_dof=Inf,
row_annotation = c("Stim", "Cohort"),
variable = "Stim")
## The 'threshold' filter has removed 4 categories:
## !CD107a&!CD154&!IFNg&!IL17a&!IL2&!IL4/5/13&TNFa, !CD107a&!CD154&!IFNg&!IL17a&IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&!IL4/5/13&!TNFa, !CD107a&!CD154&!IFNg&IL17a&!IL2&IL4/5/13&!TNFa
if(save_output) {
saveRDS(cd8_data2plot, here::here("processed_data/20200815_Merged_CD8_COMPASS_Data.rds"))
}
cd8_merged_COMPASSResult <- crList2$`8_Spike_1`
cd8_merged_COMPASSResult$fit$mean_gamma <- cd8_data2plot$MMerged
cd8_merged_COMPASSResult$fit$categories <- cd8_data2plot$catsMerged %>% mutate_all(~ as.numeric(as.character(.))) %>% mutate(Counts = rowSums(.))
rownames(cd8_merged_COMPASSResult$fit$categories) <- rownames(cd8_data2plot$catsMerged)
cd8_merged_COMPASSResult$data$meta <- cd8_data2plot$rowannMerged
cd8_merged_COMPASSResult$data$meta$Run_SAMPLE_ID <- rownames(cd8_merged_COMPASSResult$data$meta)
cd8_merged_COMPASSResult$data$meta$Stim <- factor(dplyr::recode(sub("^8_", "", cd8_merged_COMPASSResult$data$meta$Stim), "Spike_1" = "S1", "Spike_2" = "S2"), levels = c("S1", "S2", "NCAP", "VEMP"))
cd8_merged_COMPASSResult$data$meta$Cohort <- factor(recode(cd8_merged_COMPASSResult$data$meta$Cohort,
"Hospitalized" = "Conv Hosp",
"Non-hospitalized" = "Conv Non-Hosp",
"Healthy" = "Assay Controls"),
levels = c("Conv Hosp", "Conv Non-Hosp", "Assay Controls"))
cd8_merged_COMPASSResult$data$individual_id <- "Run_SAMPLE_ID"
cd8_heatmap_by_ifng <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE)
draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
cd8_heatmap_by_ifng_gap <- plot.COMPASSResult.ComplexHeatmap(cr = cd8_merged_COMPASSResult,
row_annotation = c("Stim", "Cohort"),
cytokine_order_for_annotation = cytokine_order_for_annotation,
dichotomize_by_cytokine = "IFNg",
dichotomize_by_cytokine_color = "#999999",
row_annotation_colors = row_ann_colors_v2,
staircase_cytokine_annotation = TRUE,
row_gap = unit(0.05, "in"))
# draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap.pdf"), width=9, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
draw(cd4_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
dev.off()
pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap.pdf"), width=5, height=8,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
draw(cd8_heatmap_by_ifng, gap = unit(0.1, "in"), merge_legends = T)
dev.off()
# With row gaps
pdf(file=here::here("out/PostCompassPlots/20200902_CD4_COMPASS_Heatmap_gap.pdf"), width=9, height=9,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica.
draw(cd4_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
dev.off()
pdf(file=here::here("out/PostCompassPlots/20200902_CD8_COMPASS_Heatmap_gap.pdf"), width=5, height=9,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial") # default unit is inches, default font is Helvetica
draw(cd8_heatmap_by_ifng_gap, gap = unit(0, "in"), merge_legends = T)
dev.off()
# Later run in linux terminal:
# convert -density 300 20200902_CD4_COMPASS_Heatmap_Edit.pdf -quality 90 20200902_CD4_COMPASS_Heatmap_Edit.png
}
CohortColors <- c("Hospitalized" = "#757bbcb2", "Non-hospitalized" = "#b0d2c8bf")
CohortOutlineColors <- c("Hospitalized" = "#757bbbff", "Non-hospitalized" = "#b0d1c8ff")
CohortLabs <- c("Hospitalized" = "H", "Non-hospitalized" = "NH")
parents_vec <- rep(c("4", "8"), each = 4)
stim_vec <- rep(c("S1", "S2", "N", "E"), times = 2)
fs_vs_cohort_plot_list <- map2(parents_vec, stim_vec, fs_vs_x_plot_categorical,
xaxis = "Cohort", current_fill_colors = CohortColors,
current_outline_colors = CohortOutlineColors, xcategorylabs = CohortLabs)
## Warning in wilcox.test.default(x = c(0.00823543307086614, 0.00870807086614173, :
## cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000123818897637795,
## 0.00426889763779528, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00817322834645669, 0.00848228346456693, :
## cannot compute exact p-value with ties
names(fs_vs_cohort_plot_list) <- paste0(stim_vec, "_", parents_vec)
fs_vs_cohort_plot_list
## $S1_4
##
## $S2_4
##
## $N_4
##
## $E_4
##
## $S1_8
##
## $S2_8
##
## $N_8
##
## $E_8
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
## Warning in regularize.values(x, y, ties, missing(ties)): collapsing to unique
## 'x' values
# FS vs demographics plots to disk
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4CDEF_CD4_FS_vs_Demographics.pdf"), width=14, height=14,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_4 | fs_vs_age_plot_list$S2_4 | fs_vs_age_plot_list$N_4 | fs_vs_age_plot_list$E_4) /
(fs_vs_sex_plot_list$S1_4 | fs_vs_sex_plot_list$S2_4 | fs_vs_sex_plot_list$N_4 | fs_vs_sex_plot_list$E_4) /
(fs_vs_days_plot_list$S1_4 | fs_vs_days_plot_list$S2_4 | fs_vs_days_plot_list$N_4 | fs_vs_days_plot_list$E_4) /
(fs_vs_cohort_plot_list$S1_4 | fs_vs_cohort_plot_list$S2_4 | fs_vs_cohort_plot_list$N_4 | fs_vs_cohort_plot_list$E_4)
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5HIJK_CD8_FS_vs_Demographics.pdf"), width=14, height=14,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_8 | fs_vs_age_plot_list$S2_8 | fs_vs_age_plot_list$N_8 | fs_vs_age_plot_list$E_8) /
(fs_vs_sex_plot_list$S1_8 | fs_vs_sex_plot_list$S2_8 | fs_vs_sex_plot_list$N_8 | fs_vs_sex_plot_list$E_8) /
(fs_vs_days_plot_list$S1_8 | fs_vs_days_plot_list$S2_8 | fs_vs_days_plot_list$N_8 | fs_vs_days_plot_list$E_8) /
(fs_vs_cohort_plot_list$S1_8 | fs_vs_cohort_plot_list$S2_8 | fs_vs_cohort_plot_list$N_8 | fs_vs_cohort_plot_list$E_8)
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig4CDEFG_CD4_FS_vs_Demographics_v2.pdf"), width=14, height=17.5,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_4 | fs_vs_age_plot_list$S2_4 | fs_vs_age_plot_list$N_4 | fs_vs_age_plot_list$E_4) /
(fs_vs_sex_plot_list$S1_4 | fs_vs_sex_plot_list$S2_4 | fs_vs_sex_plot_list$N_4 | fs_vs_sex_plot_list$E_4) /
(fs_vs_days_plot_list$S1_4 | fs_vs_days_plot_list$S2_4 | fs_vs_days_plot_list$N_4 | fs_vs_days_plot_list$E_4) /
(fs_vs_comorbid_plot_list$S1_4 | fs_vs_comorbid_plot_list$S2_4 | fs_vs_comorbid_plot_list$N_4 | fs_vs_comorbid_plot_list$E_4) /
(fs_vs_cohort_plot_list$S1_4 | fs_vs_cohort_plot_list$S2_4 | fs_vs_cohort_plot_list$N_4 | fs_vs_cohort_plot_list$E_4)
dev.off()
pdf(file=here::here("out/PostCompassPlots/FS_Plots/Fig5HIJKL_CD8_FS_vs_Demographics_v2.pdf"), width=14, height=17.5,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
(fs_vs_age_plot_list$S1_8 | fs_vs_age_plot_list$S2_8 | fs_vs_age_plot_list$N_8 | fs_vs_age_plot_list$E_8) /
(fs_vs_sex_plot_list$S1_8 | fs_vs_sex_plot_list$S2_8 | fs_vs_sex_plot_list$N_8 | fs_vs_sex_plot_list$E_8) /
(fs_vs_days_plot_list$S1_8 | fs_vs_days_plot_list$S2_8 | fs_vs_days_plot_list$N_8 | fs_vs_days_plot_list$E_8) /
(fs_vs_comorbid_plot_list$S1_8 | fs_vs_comorbid_plot_list$S2_8 | fs_vs_comorbid_plot_list$N_8 | fs_vs_comorbid_plot_list$E_8) /
(fs_vs_cohort_plot_list$S1_8 | fs_vs_cohort_plot_list$S2_8 | fs_vs_cohort_plot_list$N_8 | fs_vs_cohort_plot_list$E_8)
dev.off()
}
source(here::here("scripts/20200604_Helper_Functions.R"))
# Arial is used by dotplot function below
# Note p-values are bonferroni adjusted in dotplots
# Also note that some points are not shown in order to zoom in better to the bulk of the point
bg_corr_dotplots <- pmap(.l = list(c(CD4RunNames, CD8RunNames),
rep(c("CD4+", "CD8"), each = 4),
list(NULL, c(0, 0.0085), NULL, NULL,
NULL, NULL, c(0, 0.005), NULL),
c(5, 5, 5, 5,
5, 5, 5, 5)),
.f = function(n, p, y, p_text_size) {
# "staircase effect" for categories df heatmap gets done automatically, unlike in heatmap
# Returned dotplot is cowplot
make_dotplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p,
return_output = T, current_ylim = y, include_0_line=T,
cytokine_order_for_annotation=cytokine_order_for_annotation,
p_text_size=p_text_size, group_by_colors=CohortColors,
zeroed_BgCorr_stats = FALSE, zeroed_BgCorr_plot = TRUE,
point_size = 1.2, dichotomize_by_cytokine = "IFNg")
})
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
names(bg_corr_dotplots) <- c(CD4RunNames, CD8RunNames)
if(save_output) {
# Save the output of make_dotplot_for_COMPASS_run to disk so we can use the extracted data for follow-up analysis
saveRDS(bg_corr_dotplots, here::here("processed_data/20200824_make_dotplot_for_COMPASS_run_list.rds"))
}
for(n in names(bg_corr_dotplots)) {
print(plot_grid(ggplot() +
theme_void() +
geom_text(aes(0,0,label=sprintf("CD%s %s", sub("([48]).*", "\\1", n), sub("_", " ", sub("[48]_(.*)", "\\1", n)))), size=10) +
xlab(NULL),
bg_corr_dotplots[[n]]$Dotplot,
ncol=1, rel_heights = c(0.1, 1)))
}
# How many points are out of bounds of the plot for plots where boundaries were specified?
bg_corr_dotplots$`4_Spike_2`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.0085)) %>%
select(-Individual, -Cohort) %>%
select_if(~max(.) > 0.0085)
## # A tibble: 1 x 1
## `!IL2&IL4/5/13&!IFNg&!TNFa&!IL17a&!CD154&!CD107a`
## <dbl>
## 1 0.0158
bg_corr_dotplots$`8_NCAP`$BgCorrMagnitudes %>% dplyr::filter_if(is.numeric, any_vars(. > 0.005)) %>%
select(-Individual, -Cohort) %>%
select_if(~max(.) > 0.005)
## # A tibble: 1 x 3
## `!IL2&!IL4/5/13&IFNg&!TNF… `!IL2&!IL4/5/13&IFNg&TNF… `!IL2&!IL4/5/13&IFNg&TNF…
## <dbl> <dbl> <dbl>
## 1 0.0105 0.0129 0.00746
if(save_output) {
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5A_CD4_S1_COMPASS_Magnitudes_vs_Cohort.pdf"), width=12, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_Spike_1"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5B_CD4_S2_COMPASS_Magnitudes_vs_Cohort.pdf"), width=11.5, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_Spike_2"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5C_CD4_N_COMPASS_Magnitudes_vs_Cohort.pdf"), width=14, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_NCAP"]]$Dotplot
dev.off()
pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5D_CD4_E_COMPASS_Magnitudes_vs_Cohort.pdf"), width=3, height=7,
onefile = TRUE, bg = "transparent", family = "Arial", fonts = "Arial")
bg_corr_dotplots[["4_VEMP"]]$Dotplot
dev.off()
}
sig_tests_tally <- lapply(names(bg_corr_dotplots), function(n) {length(which(bg_corr_dotplots[[n]]$Test_Results$p.adj < 0.05))})
names(sig_tests_tally) <- names(bg_corr_dotplots)
sig_tests_tally # make sure all significant comparisons were plotted and accounted for
## $`4_Spike_1`
## [1] 9
##
## $`4_Spike_2`
## [1] 4
##
## $`4_NCAP`
## [1] 5
##
## $`4_VEMP`
## [1] 0
##
## $`8_Spike_1`
## [1] 0
##
## $`8_Spike_2`
## [1] 0
##
## $`8_NCAP`
## [1] 0
##
## $`8_VEMP`
## [1] 0
Save to disk: background-corrected magnitudes from subsets which were significantly different across hospitalization status. For integrated analysis with other T cell and Ab data.
if(save_output) {
signif_bgcorr_dat_list <- lapply(c(CD4RunNames, CD8RunNames), function(runName) {
signif_subset_names <- bg_corr_dotplots[[runName]]$Test_Results %>% dplyr::filter(p.adj < 0.05) %>% dplyr::pull(BooleanSubset)
bg_corr_dotplots[[runName]]$BgCorrMagnitudes %>%
rename("SAMPLE_ID" = "Individual") %>%
dplyr::select(c("SAMPLE_ID", signif_subset_names)) %>%
rename_at(vars(signif_subset_names), ~ paste("CD", sub("Spike_", "S", runName), " ", ., sep = ""))
})
names(signif_bgcorr_dat_list) <- c(CD4RunNames, CD8RunNames)
bgcorr_dat_2save <- purrr::reduce(signif_bgcorr_dat_list, full_join, by = "SAMPLE_ID") %>%
# Convert proportions into percents
# Convert the numeric columns to character type after rounding to 20 digits. This will help ensure consistency when writing and reading the data to a file
mutate_at(vars(-"SAMPLE_ID"), ~ format(. * 100, digits = 20))
write.csv(bgcorr_dat_2save, here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), row.names = F)
# bgcorr_dat_2save <- read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F)
# all.equal(bgcorr_dat_2save %>% mutate_at(vars(-"SAMPLE_ID"), as.numeric),
# read.csv(here::here("processed_data/20200813_HAARVI_Signif_COMPASS_Subsets_Background_Corrected_Percents_pt1.csv"), stringsAsFactors = F, check.names = F))
}
# Note p-values are bonferroni adjusted in dotplots
bg_corr_boxplots <- pmap(.l = list(CD4RunNames,
c("CD4+", "CD4+", "CD4+", "CD4+"),
list(c(0, 0.004), c(0, 0.0061), NULL, NULL), # list(c(0, 0.0041), c(0, 0.0062), c(0, 0.0048), c(0, 0.028)),
c(5, 5, 5, 5),
c(1.3, 1.3, 1.3, -3.5),
list("IFNg", "IFNg", "IFNg", NULL),
c("Spike 1", "Spike 2", "NCAP", "VEMP")),
.f = function(n, p, y, p_text_size, left_cats_heatmap_padding, dichotomize_by_cytokine, main_title) {
# "staircase effect" for categories df heatmap gets done automatically, unlike in heatmap
# Returned plot is gtable
make_boxplot_for_COMPASS_run(crList.no_healthy[[n]], n, parentSubset = p, add_legend = T,
current_ylim = y, include_0_line=T,
cytokine_order_for_annotation=cytokine_order_for_annotation,
p_text_size=p_text_size,
group_by_order=c("Hospitalized", "Non-hospitalized"),
group_by_colors=CohortColors,
group_by_labels = c("Conv Hosp", "Conv Non-Hosp"),
zeroed_BgCorr_stats = FALSE, zeroed_BgCorr_plot = TRUE,
dichotomize_by_cytokine = dichotomize_by_cytokine, dichotomize_by_cytokine_color="#999999",
cats_heatmap_left_padding=left_cats_heatmap_padding, main_title = main_title)
})
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
## Warning in wilcox.test.default(x = c(9.13129153996247e-06,
## -3.5816939621594e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(6.55268825515531e-05,
## 2.63368127189248e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(1.67578931262956e-05,
## 9.63930159703005e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000373895585593641,
## 0.000158020876313549, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0, 0, 2.90601899734598e-06,
## 3.22018593367427e-06, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0, 8.77893757297492e-06, 0, 0,
## 2.49937515621095e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0, 0, 9.7929764772705e-06,
## 1.02971765141998e-05, : cannot compute exact p-value with ties
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning in wilcox.test.default(x = c(1.84815740091305e-05,
## -3.5816939621594e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0, 6.91276095672612e-05,
## 8.92319361099338e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(9.3577729959057e-05,
## 0.000129300984229124, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000759032486590426,
## 0.000138255219134522, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.00020240866309078,
## 0.000172819023918153, : cannot compute exact p-value with ties
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
## Warning: Removed 3 rows containing non-finite values (stat_boxplot).
## Warning in wilcox.test.default(x = c(8.48003522771347e-06,
## -1.12650122952989e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(4.74664768007595e-05,
## 7.36557819788853e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(5.56620341480128e-05,
## 5.72878304280219e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0, 0, -6.88695747992452e-06,
## 1.05949038512476e-05, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(4.77509546812196e-05,
## 0.000154725280603236, : cannot compute exact p-value with ties
## Warning in wilcox.test.default(x = c(0.000379731814406076,
## 0.000171863491284066, : cannot compute exact p-value with ties
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
## `summarise()` regrouping output by 'Cohort' (override with `.groups` argument)
## Joining, by = "BooleanSubset"
## Joining, by = "BooleanSubset"
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## `summarise()` ungrouping output (override with `.groups` argument)
## Warning: Ignoring unknown aesthetics: xmin, xmax, annotations, y_position
names(bg_corr_boxplots) <- CD4RunNames
if(save_output) {
cairo_pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5A_CD4_S1_COMPASS_Magnitudes_vs_Cohort_Boxplots.pdf"), width=12, height=7.2,
onefile = TRUE, bg = "transparent", family = "Arial")
grid.draw(bg_corr_boxplots[["4_Spike_1"]]$Plot)
dev.off()
cairo_pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5B_CD4_S2_COMPASS_Magnitudes_vs_Cohort_Boxplots.pdf"), width=11.5, height=7.2,
onefile = TRUE, bg = "transparent", family = "Arial")
grid.draw(bg_corr_boxplots[["4_Spike_2"]]$Plot)
dev.off()
cairo_pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5C_CD4_N_COMPASS_Magnitudes_vs_Cohort_Boxplots.pdf"), width=14, height=7.2,
onefile = TRUE, bg = "transparent", family = "Arial")
grid.draw(bg_corr_boxplots[["4_NCAP"]]$Plot)
dev.off()
cairo_pdf(file=here::here("out/PostCompassPlots/Magnitude_Plots/FigS5D_CD4_E_COMPASS_Magnitudes_vs_Cohort_Boxplots.pdf"), width=3, height=7.2,
onefile = TRUE, bg = "transparent", family = "Arial")
grid.draw(bg_corr_boxplots[["4_VEMP"]]$Plot)
dev.off()
}